'''
Created on 20-Apr-2013

@author: cdac
'''
from SMS.DataSource import DataSource
from SMS.PreprocessSMS import PreprocessSMS
from SMS.Stats import Stats


if __name__ == '__main__':
    small_ham_file  = '../Data/HamSmall.csv'
    small_spam_file = '../Data/SpamSmall.csv'

    large_ham_file  = '../Data/Ham.csv'
    large_spam_file = '../Data/Spam.csv'
    
#     ds = DataSource(large_ham_file, large_spam_file)
    ds = DataSource(small_ham_file, small_spam_file)

    # Get all msgs in ham and spam
    hams    = ds.getHams()
    spams   = ds.getSpams()
    
    # Get random msgs in ham and spam
    hams    = ds.get_random_msgs(large_ham_file, 100);
    spams   = ds.get_random_msgs(large_spam_file, 100)

    ham_stats = Stats(hams)
    ham_stats.print_stats()

    spam_stats = Stats(spams)
    spam_stats.print_stats()



    processor = PreprocessSMS()
      
    hams = processor.remove_puncts_and_special_chars(hams)
    hams = processor.convert_to_lowercase(hams)
    
    spams = processor.remove_puncts_and_special_chars(spams)
    spams = processor.convert_to_lowercase(spams)
  
    ham_stats = Stats(hams)
    ham_stats.print_stats()

    spam_stats = Stats(spams)
    spam_stats.print_stats()
    